{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 使用sklearn实现线性回归"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "from sklearn import linear_model\n",
    "from sklearn.preprocessing import StandardScaler#引入归一化的包"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [],
   "source": [
    "def linearRegression():\n",
    "    print (u\"加载数据...\\n\")\n",
    "    data = loadtxtAndcsv_data(\"train.txt\",\",\",np.float64)  #读取数据\n",
    "    X = np.array(data[:,0:-1],dtype=np.float64)      # X对应0到倒数第2列                  \n",
    "    y = np.array(data[:,-1],dtype=np.float64)        # y对应最后一列  \n",
    "        \n",
    "    # 归一化操作\n",
    "    scaler = StandardScaler()   \n",
    "    scaler.fit(X)\n",
    "    x_train = scaler.transform(X)\n",
    "    x_test = scaler.transform(np.array([1650,3]).reshape(1,-1))\n",
    "    \n",
    "    # 线性模型拟合\n",
    "    model = linear_model.LinearRegression()\n",
    "    model.fit(x_train, y)\n",
    "    \n",
    "    #预测结果\n",
    "    result = model.predict(x_test)\n",
    "    print ('model_coef',model.coef_)       # Coefficient of the features 决策函数中的特征系数\n",
    "    print ('model_intercept',model.intercept_)  # 又名bias偏置,若设置为False，则为0\n",
    "    print ('result',result)            # 预测结果\n",
    "    \n",
    "# 加载txt和csv文件\n",
    "def loadtxtAndcsv_data(fileName,split,dataType):\n",
    "    return np.loadtxt(fileName,delimiter=split,dtype=dataType)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "加载数据...\n",
      "\n",
      "model_coef [109447.79646964  -6578.35485416]\n",
      "model_intercept 340412.6595744681\n",
      "result [293081.4643349]\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "D:\\Anaconda\\lib\\site-packages\\sklearn\\utils\\validation.py:475: DataConversionWarning: Data with input dtype int32 was converted to float64 by StandardScaler.\n",
      "  warnings.warn(msg, DataConversionWarning)\n"
     ]
    }
   ],
   "source": [
    "linearRegression()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
